data <- read_csv("/Users/jorgevargasmutizabal/Desktop/Frog game statistics/R Analysis Frog/filtered_data.csv")
## New names:
## Rows: 21488 Columns: 9
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (9): ...1, Player_ID, LexTale, Game_Version, Game_Level, Phrase_Conditio...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
str(data)
## spc_tbl_ [21,488 × 9] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ ...1            : num [1:21488] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Player_ID       : num [1:21488] 1 1 1 1 1 1 1 1 1 1 ...
##  $ LexTale         : num [1:21488] 8 8 8 8 8 8 8 8 8 8 ...
##  $ Game_Version    : num [1:21488] 1 1 1 1 1 1 1 1 1 1 ...
##  $ Game_Level      : num [1:21488] 1 1 1 1 1 1 1 1 1 1 ...
##  $ Phrase_Condition: num [1:21488] 1 1 1 1 1 1 1 1 1 1 ...
##  $ Question_Num    : num [1:21488] 0 1 2 3 4 5 6 7 8 9 ...
##  $ Answer          : num [1:21488] 1 1 1 1 1 1 1 1 0 1 ...
##  $ Reaction_Time   : num [1:21488] 1.99 0.8 1.8 0.77 0.82 0.86 0.82 0.85 0.78 1.06 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   ...1 = col_double(),
##   ..   Player_ID = col_double(),
##   ..   LexTale = col_double(),
##   ..   Game_Version = col_double(),
##   ..   Game_Level = col_double(),
##   ..   Phrase_Condition = col_double(),
##   ..   Question_Num = col_double(),
##   ..   Answer = col_double(),
##   ..   Reaction_Time = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
data <- data %>%
  mutate(
    Player_ID = as.factor(Player_ID),          # Convert to factor
    LexTale = as.numeric(LexTale),              # Convert to numeric
    Game_Version = as.factor(Game_Version),     # Convert to factor
    Game_Level = as.factor(Game_Level),        # Convert to factor
    Phrase_Condition = as.factor(Phrase_Condition), # Convert to factor
    Question_Num = as.numeric(Question_Num),    # Convert to numeric
    Answer = as.numeric(Answer),                 # Convert to factor
    Reaction_Time = as.numeric(Reaction_Time)   # Convert to numeric
  )

# Verify changes
str(data)
## tibble [21,488 × 9] (S3: tbl_df/tbl/data.frame)
##  $ ...1            : num [1:21488] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Player_ID       : Factor w/ 38 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ LexTale         : num [1:21488] 8 8 8 8 8 8 8 8 8 8 ...
##  $ Game_Version    : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Game_Level      : Factor w/ 8 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Phrase_Condition: Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Question_Num    : num [1:21488] 0 1 2 3 4 5 6 7 8 9 ...
##  $ Answer          : num [1:21488] 1 1 1 1 1 1 1 1 0 1 ...
##  $ Reaction_Time   : num [1:21488] 1.99 0.8 1.8 0.77 0.82 0.86 0.82 0.85 0.78 1.06 ...

0. What is the average reaction time marginalizing over everything?

Oa: rt ~ 1 + (1 | participant)

model1 <- lmer(Reaction_Time ~ 1 + (1 | Player_ID), data = data)

# Obtain and print a summary of the model to examine fixed and random effects
print(summary(model1))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Reaction_Time ~ 1 + (1 | Player_ID)
##    Data: data
## 
## REML criterion at convergence: 22273.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.4173 -0.7322 -0.1049  0.6110  3.5746 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  Player_ID (Intercept) 0.01319  0.1149  
##  Residual              0.16394  0.4049  
## Number of obs: 21488, groups:  Player_ID, 38
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  0.83745    0.01885 37.08626   44.43   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Diagnostic plots
plot_residuals_diagnostics(model1)

## `geom_smooth()` using formula = 'y ~ x'

# Model diagnostics
check_model(model1)

1. Does learning take place?

1b: rt ~ 1 + time + (1 + time | participant)

model2 <- lmer(Reaction_Time ~ 1 + Question_Num + (1 + Question_Num | Player_ID), data = data)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Model failed to converge with 1 negative eigenvalue: -2.5e+01
# Obtain and print a summary of the model to examine fixed and random effects
print(summary(model2))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Reaction_Time ~ 1 + Question_Num + (1 + Question_Num | Player_ID)
##    Data: data
## 
## REML criterion at convergence: 21846.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.6398 -0.7207 -0.0994  0.6078  3.7181 
## 
## Random effects:
##  Groups    Name         Variance  Std.Dev. Corr 
##  Player_ID (Intercept)  3.649e-01 0.604035      
##            Question_Num 2.582e-06 0.001607 -0.96
##  Residual               1.591e-01 0.398879      
## Number of obs: 21488, groups:  Player_ID, 38
## 
## Fixed effects:
##                Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)   8.713e-01  9.814e-02  2.145e+04   8.878   <2e-16 ***
## Question_Num -2.220e-04  2.620e-04  2.009e+01  -0.847    0.407    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## Question_Nm -0.962
## optimizer (nloptwrap) convergence code: 0 (OK)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 1 negative eigenvalues
#Diagnostic plots
plot_residuals_diagnostics(model2)

## `geom_smooth()` using formula = 'y ~ x'

# Diagnostic checks - Simulating residuals to validate model assumptions
residuals_simulation <- simulateResiduals(fittedModel = model2, n = 500)
plot(residuals_simulation)

# Model diagnostics
check_model(model2)

# Check for influential cases potentially affecting the model
#influence_measures <- influence(model3)
#plot(influence_measures, which = "cook")

# If assumptions are not met, consider re-fitting the model with transformations or different specifications
# model_revised <- update(model, . ~ . + log(Reaction_Time))
# print(summary(model_revised))

# Model interpretation - examining fixed and random effects
#fixed_effects <- fixef(model3)
#random_effects <- ranef(model3)

# Printing fixed effects for interpretation
#print(fixed_effects)

# Printing random effects for interpretation
#print(random_effects)

# Get estimated marginal means for specific values of Question_Num
emm_results <- emmeans(model2, specs = ~ Question_Num, at = list(Question_Num = seq(min(data$Question_Num), max(data$Question_Num), by = 10)))
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'pbkrtest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(pbkrtest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'lmerTest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(lmerTest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
# Summarize the results
summary(emm_results)
##  Question_Num emmean     SE  df asymp.LCL asymp.UCL
##             0  0.871 0.0981 Inf     0.679     1.064
##            10  0.869 0.0956 Inf     0.682     1.057
##            20  0.867 0.0931 Inf     0.684     1.049
##            30  0.865 0.0906 Inf     0.687     1.042
##            40  0.862 0.0881 Inf     0.690     1.035
##            50  0.860 0.0856 Inf     0.692     1.028
##            60  0.858 0.0831 Inf     0.695     1.021
##            70  0.856 0.0806 Inf     0.698     1.014
##            80  0.854 0.0782 Inf     0.700     1.007
##            90  0.851 0.0757 Inf     0.703     1.000
##           100  0.849 0.0733 Inf     0.706     0.993
##           110  0.847 0.0708 Inf     0.708     0.986
##           120  0.845 0.0684 Inf     0.711     0.979
##           130  0.842 0.0660 Inf     0.713     0.972
##           140  0.840 0.0636 Inf     0.716     0.965
##           150  0.838 0.0613 Inf     0.718     0.958
##           160  0.836 0.0589 Inf     0.720     0.951
##           170  0.834 0.0566 Inf     0.723     0.944
##           180  0.831 0.0543 Inf     0.725     0.938
##           190  0.829 0.0520 Inf     0.727     0.931
##           200  0.827 0.0498 Inf     0.729     0.925
##           210  0.825 0.0476 Inf     0.731     0.918
##           220  0.822 0.0454 Inf     0.733     0.912
##           230  0.820 0.0434 Inf     0.735     0.905
##           240  0.818 0.0413 Inf     0.737     0.899
##           250  0.816 0.0394 Inf     0.739     0.893
##           260  0.814 0.0375 Inf     0.740     0.887
##           270  0.811 0.0357 Inf     0.741     0.881
##           280  0.809 0.0340 Inf     0.743     0.876
##           290  0.807 0.0324 Inf     0.743     0.871
##           300  0.805 0.0310 Inf     0.744     0.866
##           310  0.803 0.0298 Inf     0.744     0.861
##           320  0.800 0.0287 Inf     0.744     0.857
##           330  0.798 0.0278 Inf     0.744     0.853
##           340  0.796 0.0272 Inf     0.743     0.849
##           350  0.794 0.0268 Inf     0.741     0.846
##           360  0.791 0.0267 Inf     0.739     0.844
##           370  0.789 0.0268 Inf     0.737     0.842
##           380  0.787 0.0271 Inf     0.734     0.840
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95
# Plot the estimated marginal means
plot(emm_results)

# Create a plot of Reaction_Time vs. Question_Num with a regression line
ggplot(data, aes(x = Question_Num, y = Reaction_Time)) +
  geom_point(alpha = 0.3) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  labs(title = "Reaction Time vs. Question Number",
       x = "Question Number",
       y = "Reaction Time")
## `geom_smooth()` using formula = 'y ~ x'

2. Which condition is the hardest overall?

2a: rt - 1 + condition + (1 + condition | participant)

model3 <- lmer(Reaction_Time ~ 1 + Phrase_Condition + (1 + Phrase_Condition | Player_ID), data = data)

# Obtain and print a summary of the model to examine fixed and random effects
print(summary(model3))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Reaction_Time ~ 1 + Phrase_Condition + (1 + Phrase_Condition |  
##     Player_ID)
##    Data: data
## 
## REML criterion at convergence: 17531.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2202 -0.6870 -0.1202  0.5792  4.1834 
## 
## Random effects:
##  Groups    Name              Variance Std.Dev. Corr             
##  Player_ID (Intercept)       0.01134  0.1065                    
##            Phrase_Condition2 0.02389  0.1546   -0.29            
##            Phrase_Condition3 0.01391  0.1179   -0.30  0.57      
##            Phrase_Condition4 0.03883  0.1971   -0.35  0.64  0.32
##  Residual                    0.12971  0.3601                    
## Number of obs: 21488, groups:  Player_ID, 38
## 
## Fixed effects:
##                   Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)        0.62506    0.01822 37.15675  34.306  < 2e-16 ***
## Phrase_Condition2  0.23231    0.02640 37.20209   8.799 1.27e-10 ***
## Phrase_Condition3  0.03446    0.02078 37.43797   1.659    0.105    
## Phrase_Condition4  0.39736    0.03274 37.12133  12.137 1.73e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Phr_C2 Phr_C3
## Phrs_Cndtn2 -0.332              
## Phrs_Cndtn3 -0.346  0.561       
## Phrs_Cndtn4 -0.378  0.636  0.336
#Diagnostic plots
plot_residuals_diagnostics(model3)

## `geom_smooth()` using formula = 'y ~ x'

# Diagnostic checks - Simulating residuals to validate model assumptions
residuals_simulation <- simulateResiduals(fittedModel = model3, n = 500)
plot(residuals_simulation)

# Model diagnostics
check_model(model3)

# Check for influential cases potentially affecting the model
#influence_measures <- influence(model3)
#plot(influence_measures, which = "cook")

# If assumptions are not met, consider re-fitting the model with transformations or different specifications
# model_revised <- update(model, . ~ . + log(Reaction_Time))
# print(summary(model_revised))

# Model interpretation - examining fixed and random effects
#fixed_effects <- fixef(model3)
#random_effects <- ranef(model3)

# Printing fixed effects for interpretation
#print(fixed_effects)

# Printing random effects for interpretation
#print(random_effects)

# Conducting pairwise comparisons of phrase conditions using estimated marginal means
emm_results <- emmeans(model3, specs = ~ Phrase_Condition)
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'pbkrtest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(pbkrtest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'lmerTest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(lmerTest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
summary(emm_results)
##  Phrase_Condition emmean     SE  df asymp.LCL asymp.UCL
##  1                 0.625 0.0182 Inf     0.589     0.661
##  2                 0.857 0.0266 Inf     0.805     0.910
##  3                 0.660 0.0224 Inf     0.616     0.703
##  4                 1.022 0.0309 Inf     0.962     1.083
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95
pairwise_comparisons <- emmeans(model3, specs = pairwise ~ Phrase_Condition)
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'pbkrtest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(pbkrtest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'lmerTest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(lmerTest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
summary(pairwise_comparisons)
## $emmeans
##  Phrase_Condition emmean     SE  df asymp.LCL asymp.UCL
##  1                 0.625 0.0182 Inf     0.589     0.661
##  2                 0.857 0.0266 Inf     0.805     0.910
##  3                 0.660 0.0224 Inf     0.616     0.703
##  4                 1.022 0.0309 Inf     0.962     1.083
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                              estimate     SE  df z.ratio p.value
##  Phrase_Condition1 - Phrase_Condition2  -0.2323 0.0264 Inf  -8.799  <.0001
##  Phrase_Condition1 - Phrase_Condition3  -0.0345 0.0208 Inf  -1.659  0.3457
##  Phrase_Condition1 - Phrase_Condition4  -0.3974 0.0327 Inf -12.137  <.0001
##  Phrase_Condition2 - Phrase_Condition3   0.1978 0.0227 Inf   8.734  <.0001
##  Phrase_Condition2 - Phrase_Condition4  -0.1651 0.0259 Inf  -6.383  <.0001
##  Phrase_Condition3 - Phrase_Condition4  -0.3629 0.0323 Inf -11.220  <.0001
## 
## Degrees-of-freedom method: asymptotic 
## P value adjustment: tukey method for comparing a family of 4 estimates
plot(emm_results)

# Create a boxplot
ggplot(data, aes(x = Phrase_Condition, y = Reaction_Time, fill = Phrase_Condition)) +
  geom_boxplot() +
  labs(title = "Reaction Times by Phrase Condition", x = "Phrase Condition", y = "Reaction Time (seconds)") +
  theme_minimal()

# Calculate average reaction times per player per condition
avg_reaction_times <- data %>%
  group_by(Player_ID, Phrase_Condition) %>%
  summarise(Avg_Reaction_Time = mean(Reaction_Time), .groups = 'drop')

# Interaction plot
ggplot(avg_reaction_times, aes(x = Phrase_Condition, y = Avg_Reaction_Time, group = Player_ID, color = Phrase_Condition)) +
  geom_line(alpha = 0.5) +  # Slightly transparent to see overlapping lines
  geom_point() +
  labs(title = "Interaction of Phrase Condition and Player on Reaction Times", x = "Phrase Condition", y = "Average Reaction Time (seconds)") +
  theme_minimal()

3. Does the version of the experiment cause different outcomes?

model8 <- lmer(Reaction_Time ~ 1 + Game_Version + (1 | Player_ID), data = data)

# Obtain and print a summary of the model to examine fixed and random effects
print(summary(model8))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Reaction_Time ~ 1 + Game_Version + (1 | Player_ID)
##    Data: data
## 
## REML criterion at convergence: 22280.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.4201 -0.7329 -0.1042  0.6121  3.5759 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  Player_ID (Intercept) 0.01211  0.1101  
##  Residual              0.16394  0.4049  
## Number of obs: 21488, groups:  Player_ID, 38
## 
## Fixed effects:
##               Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)    0.80191    0.03720 34.25585  21.554   <2e-16 ***
## Game_Version2  0.09354    0.05260 34.21796   1.778   0.0843 .  
## Game_Version3  0.06630    0.05123 34.11126   1.294   0.2043    
## Game_Version4 -0.01540    0.05121 34.06945  -0.301   0.7654    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Gm_Vr2 Gm_Vr3
## Game_Versn2 -0.707              
## Game_Versn3 -0.726  0.514       
## Game_Versn4 -0.726  0.514  0.528
#Diagnostic plots
plot_residuals_diagnostics(model8)

## `geom_smooth()` using formula = 'y ~ x'

# Diagnostic checks - Simulating residuals to validate model assumptions
residuals_simulation <- simulateResiduals(fittedModel = model8, n = 500)
plot(residuals_simulation)

# Model diagnostics
check_model(model8)

# Check for influential cases potentially affecting the model
#influence_measures <- influence(model4)
#plot(influence_measures, which = "cook")

# If assumptions are not met, consider re-fitting the model with transformations or different specifications
# model_revised <- update(model, . ~ . + log(Reaction_Time))
# print(summary(model_revised))

# Model interpretation - examining fixed and random effects
fixed_effects <- fixef(model8)
random_effects <- ranef(model8)

How does the effect of condition vary depending on the version?

4b: rt ~ 1 + condition * version + (1 + condition | participant)

#This doesnt work:
#model4 <- lmer(Reaction_Time ~ 1 + Phrase_Condition * Game_Version + (1 + Phrase_Condition | Player_ID), data = data)

model4 <- lmer(Reaction_Time ~ 1 + Phrase_Condition * Game_Version + (1 | Player_ID), data = data)

# Obtain and print a summary of the model to examine fixed and random effects
print(summary(model4))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Reaction_Time ~ 1 + Phrase_Condition * Game_Version + (1 | Player_ID)
##    Data: data
## 
## REML criterion at convergence: 18852.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8743 -0.6916 -0.1071  0.5890  4.2127 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  Player_ID (Intercept) 0.01441  0.1200  
##  Residual              0.13924  0.3731  
## Number of obs: 21488, groups:  Player_ID, 38
## 
## Fixed effects:
##                                   Estimate Std. Error         df t value
## (Intercept)                      6.556e-01  4.202e-02  3.992e+01  15.601
## Phrase_Condition2                1.356e-01  1.812e-02  2.144e+04   7.488
## Phrase_Condition3               -5.123e-02  1.777e-02  2.144e+04  -2.883
## Phrase_Condition4                3.342e-01  1.574e-02  2.145e+04  21.231
## Game_Version2                   -1.697e-02  5.927e-02  3.951e+01  -0.286
## Game_Version3                   -6.202e-02  5.788e-02  3.980e+01  -1.072
## Game_Version4                   -1.793e-02  5.769e-02  3.928e+01  -0.311
## Phrase_Condition2:Game_Version2  1.793e-01  2.505e-02  2.144e+04   7.156
## Phrase_Condition3:Game_Version2  1.668e-01  2.457e-02  2.144e+04   6.791
## Phrase_Condition4:Game_Version2  1.195e-01  2.185e-02  2.145e+04   5.468
## Phrase_Condition2:Game_Version3  1.806e-01  2.481e-02  2.144e+04   7.282
## Phrase_Condition3:Game_Version3  1.939e-01  2.407e-02  2.144e+04   8.055
## Phrase_Condition4:Game_Version3  8.696e-02  2.108e-02  2.145e+04   4.125
## Phrase_Condition2:Game_Version4 -1.333e-02  2.402e-02  2.144e+04  -0.555
## Phrase_Condition3:Game_Version4  4.042e-02  2.350e-02  2.144e+04   1.720
## Phrase_Condition4:Game_Version4 -4.830e-02  2.038e-02  2.145e+04  -2.370
##                                 Pr(>|t|)    
## (Intercept)                      < 2e-16 ***
## Phrase_Condition2               7.29e-14 ***
## Phrase_Condition3                0.00395 ** 
## Phrase_Condition4                < 2e-16 ***
## Game_Version2                    0.77611    
## Game_Version3                    0.29039    
## Game_Version4                    0.75764    
## Phrase_Condition2:Game_Version2 8.55e-13 ***
## Phrase_Condition3:Game_Version2 1.14e-11 ***
## Phrase_Condition4:Game_Version2 4.60e-08 ***
## Phrase_Condition2:Game_Version3 3.40e-13 ***
## Phrase_Condition3:Game_Version3 8.34e-16 ***
## Phrase_Condition4:Game_Version3 3.72e-05 ***
## Phrase_Condition2:Game_Version4  0.57876    
## Phrase_Condition3:Game_Version4  0.08539 .  
## Phrase_Condition4:Game_Version4  0.01779 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(summary(model4), correlation=TRUE)  or
##     vcov(summary(model4))        if you need it
#Diagnostic plots
plot_residuals_diagnostics(model4)

## `geom_smooth()` using formula = 'y ~ x'

# Diagnostic checks - Simulating residuals to validate model assumptions
residuals_simulation <- simulateResiduals(fittedModel = model4, n = 500)
plot(residuals_simulation)

# Model diagnostics
check_model(model4)

# Check for influential cases potentially affecting the model
#influence_measures <- influence(model4)
#plot(influence_measures, which = "cook")

# If assumptions are not met, consider re-fitting the model with transformations or different specifications
# model_revised <- update(model, . ~ . + log(Reaction_Time))
# print(summary(model_revised))

# Model interpretation - examining fixed and random effects
fixed_effects <- fixef(model4)
random_effects <- ranef(model4)

# Printing fixed effects for interpretation
print(fixed_effects)
##                     (Intercept)               Phrase_Condition2 
##                      0.65558253                      0.13564920 
##               Phrase_Condition3               Phrase_Condition4 
##                     -0.05123020                      0.33421265 
##                   Game_Version2                   Game_Version3 
##                     -0.01697242                     -0.06201693 
##                   Game_Version4 Phrase_Condition2:Game_Version2 
##                     -0.01792551                      0.17929197 
## Phrase_Condition3:Game_Version2 Phrase_Condition4:Game_Version2 
##                      0.16685111                      0.11946684 
## Phrase_Condition2:Game_Version3 Phrase_Condition3:Game_Version3 
##                      0.18063763                      0.19392587 
## Phrase_Condition4:Game_Version3 Phrase_Condition2:Game_Version4 
##                      0.08696084                     -0.01333338 
## Phrase_Condition3:Game_Version4 Phrase_Condition4:Game_Version4 
##                      0.04042475                     -0.04830418
# Printing random effects for interpretation
print(random_effects)
## $Player_ID
##     (Intercept)
## 1  -0.162882039
## 2  -0.145521920
## 3  -0.108868536
## 4   0.104632078
## 5   0.133752086
## 6  -0.024278371
## 7   0.042178400
## 8   0.006710037
## 9   0.154278267
## 10  0.041854998
## 11 -0.004540477
## 12  0.068106551
## 13 -0.041911198
## 14 -0.012500741
## 15  0.163653353
## 16  0.080671832
## 17 -0.285579699
## 18 -0.009754619
## 19 -0.060621845
## 20  0.007955007
## 21  0.055070276
## 22  0.023949859
## 23 -0.155227060
## 24 -0.015719218
## 25  0.073204282
## 26  0.143623779
## 27 -0.077192166
## 28  0.004957085
## 29  0.159977433
## 30 -0.015698051
## 31  0.107812884
## 32  0.076202538
## 33  0.076693016
## 34 -0.156064018
## 35  0.185045375
## 36 -0.074621237
## 37 -0.149749872
## 38 -0.209598068
## 
## with conditional variances for "Player_ID"
# Estimate marginal means for all combinations
emm1 <- emmeans(model4, specs = ~ Phrase_Condition * Game_Version)
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'pbkrtest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(pbkrtest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'lmerTest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(lmerTest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
summary(emm1)
##  Phrase_Condition Game_Version emmean     SE  df asymp.LCL asymp.UCL
##  1                1             0.656 0.0420 Inf     0.573     0.738
##  2                1             0.791 0.0420 Inf     0.709     0.874
##  3                1             0.604 0.0419 Inf     0.522     0.686
##  4                1             0.990 0.0410 Inf     0.909     1.070
##  1                2             0.639 0.0418 Inf     0.557     0.721
##  2                2             0.954 0.0419 Inf     0.871     1.036
##  3                2             0.754 0.0418 Inf     0.672     0.836
##  4                2             1.092 0.0411 Inf     1.012     1.173
##  1                3             0.594 0.0398 Inf     0.516     0.672
##  2                3             0.910 0.0398 Inf     0.832     0.988
##  3                3             0.736 0.0395 Inf     0.659     0.814
##  4                3             1.015 0.0387 Inf     0.939     1.091
##  1                4             0.638 0.0395 Inf     0.560     0.715
##  2                4             0.760 0.0396 Inf     0.682     0.838
##  3                4             0.627 0.0395 Inf     0.549     0.704
##  4                4             0.924 0.0386 Inf     0.848     0.999
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95
# Pairwise comparisons across phrase conditions within each game version
pairs(emm1, simple = "each")
## $`simple contrasts for Phrase_Condition`
## Game_Version = 1:
##  contrast                              estimate     SE  df z.ratio p.value
##  Phrase_Condition1 - Phrase_Condition2  -0.1356 0.0181 Inf  -7.488  <.0001
##  Phrase_Condition1 - Phrase_Condition3   0.0512 0.0178 Inf   2.883  0.0206
##  Phrase_Condition1 - Phrase_Condition4  -0.3342 0.0157 Inf -21.231  <.0001
##  Phrase_Condition2 - Phrase_Condition3   0.1869 0.0178 Inf  10.525  <.0001
##  Phrase_Condition2 - Phrase_Condition4  -0.1986 0.0157 Inf -12.624  <.0001
##  Phrase_Condition3 - Phrase_Condition4  -0.3854 0.0152 Inf -25.283  <.0001
## 
## Game_Version = 2:
##  contrast                              estimate     SE  df z.ratio p.value
##  Phrase_Condition1 - Phrase_Condition2  -0.3149 0.0173 Inf -18.200  <.0001
##  Phrase_Condition1 - Phrase_Condition3  -0.1156 0.0170 Inf  -6.816  <.0001
##  Phrase_Condition1 - Phrase_Condition4  -0.4537 0.0152 Inf -29.943  <.0001
##  Phrase_Condition2 - Phrase_Condition3   0.1993 0.0172 Inf  11.582  <.0001
##  Phrase_Condition2 - Phrase_Condition4  -0.1387 0.0155 Inf  -8.965  <.0001
##  Phrase_Condition3 - Phrase_Condition4  -0.3381 0.0151 Inf -22.441  <.0001
## 
## Game_Version = 3:
##  contrast                              estimate     SE  df z.ratio p.value
##  Phrase_Condition1 - Phrase_Condition2  -0.3163 0.0169 Inf -18.666  <.0001
##  Phrase_Condition1 - Phrase_Condition3  -0.1427 0.0162 Inf  -8.787  <.0001
##  Phrase_Condition1 - Phrase_Condition4  -0.4212 0.0140 Inf -30.036  <.0001
##  Phrase_Condition2 - Phrase_Condition3   0.1736 0.0163 Inf  10.660  <.0001
##  Phrase_Condition2 - Phrase_Condition4  -0.1049 0.0141 Inf  -7.453  <.0001
##  Phrase_Condition3 - Phrase_Condition4  -0.2785 0.0131 Inf -21.194  <.0001
## 
## Game_Version = 4:
##  contrast                              estimate     SE  df z.ratio p.value
##  Phrase_Condition1 - Phrase_Condition2  -0.1223 0.0158 Inf  -7.759  <.0001
##  Phrase_Condition1 - Phrase_Condition3   0.0108 0.0154 Inf   0.703  0.8960
##  Phrase_Condition1 - Phrase_Condition4  -0.2859 0.0129 Inf -22.089  <.0001
##  Phrase_Condition2 - Phrase_Condition3   0.1331 0.0156 Inf   8.523  <.0001
##  Phrase_Condition2 - Phrase_Condition4  -0.1636 0.0133 Inf -12.333  <.0001
##  Phrase_Condition3 - Phrase_Condition4  -0.2967 0.0126 Inf -23.589  <.0001
## 
## Degrees-of-freedom method: asymptotic 
## P value adjustment: tukey method for comparing a family of 4 estimates 
## 
## $`simple contrasts for Game_Version`
## Phrase_Condition = 1:
##  contrast                       estimate     SE  df z.ratio p.value
##  Game_Version1 - Game_Version2  0.016972 0.0593 Inf   0.286  0.9918
##  Game_Version1 - Game_Version3  0.062017 0.0579 Inf   1.072  0.7069
##  Game_Version1 - Game_Version4  0.017926 0.0577 Inf   0.311  0.9896
##  Game_Version2 - Game_Version3  0.045045 0.0577 Inf   0.780  0.8634
##  Game_Version2 - Game_Version4  0.000953 0.0575 Inf   0.017  1.0000
##  Game_Version3 - Game_Version4 -0.044091 0.0561 Inf  -0.786  0.8608
## 
## Phrase_Condition = 2:
##  contrast                       estimate     SE  df z.ratio p.value
##  Game_Version1 - Game_Version2 -0.162320 0.0593 Inf  -2.735  0.0317
##  Game_Version1 - Game_Version3 -0.118621 0.0579 Inf  -2.049  0.1701
##  Game_Version1 - Game_Version4  0.031259 0.0577 Inf   0.541  0.9489
##  Game_Version2 - Game_Version3  0.043699 0.0578 Inf   0.756  0.8742
##  Game_Version2 - Game_Version4  0.193578 0.0577 Inf   3.357  0.0044
##  Game_Version3 - Game_Version4  0.149880 0.0562 Inf   2.668  0.0382
## 
## Phrase_Condition = 3:
##  contrast                       estimate     SE  df z.ratio p.value
##  Game_Version1 - Game_Version2 -0.149879 0.0591 Inf  -2.534  0.0548
##  Game_Version1 - Game_Version3 -0.131909 0.0576 Inf  -2.291  0.1001
##  Game_Version1 - Game_Version4 -0.022499 0.0575 Inf  -0.391  0.9797
##  Game_Version2 - Game_Version3  0.017970 0.0575 Inf   0.313  0.9894
##  Game_Version2 - Game_Version4  0.127379 0.0575 Inf   2.217  0.1187
##  Game_Version3 - Game_Version4  0.109410 0.0559 Inf   1.959  0.2038
## 
## Phrase_Condition = 4:
##  contrast                       estimate     SE  df z.ratio p.value
##  Game_Version1 - Game_Version2 -0.102494 0.0581 Inf  -1.766  0.2900
##  Game_Version1 - Game_Version3 -0.024944 0.0564 Inf  -0.442  0.9711
##  Game_Version1 - Game_Version4  0.066230 0.0563 Inf   1.175  0.6424
##  Game_Version2 - Game_Version3  0.077551 0.0564 Inf   1.375  0.5150
##  Game_Version2 - Game_Version4  0.168724 0.0564 Inf   2.994  0.0146
##  Game_Version3 - Game_Version4  0.091174 0.0546 Inf   1.669  0.3404
## 
## Degrees-of-freedom method: asymptotic 
## P value adjustment: tukey method for comparing a family of 4 estimates
plot(emm1)

5. Does the rate of improvement in each condition depend on the version?

5a: rt ~ 1 + time + condition * version + (1 + time + condition | participant)

1 (Intercept): Represents the baseline reaction time for the first question and the baseline categories of Phrase_Condition and Game_Version.

Question_Num: Continuous variable representing the sequential number of questions. A negative coefficient indicates improved reaction time (i.e., decreasing reaction time) as the game progresses.

Phrase_Condition * Game_Version: Examines if the effect of phrase conditions on reaction time is modified by the game version, identifying combinations where game version influences the difficulty of phrase conditions.

(1 | Player_ID): Random intercepts for players account for individual differences in baseline reaction times, acknowledging that some players generally respond faster or slower than others, independent of condition or version.

model5 <- lmer(Reaction_Time ~ 1 + Question_Num + Phrase_Condition * Game_Version + (1 | Player_ID), data = data)

# Obtain and print a summary of the model to examine fixed and random effects
print(summary(model5))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Reaction_Time ~ 1 + Question_Num + Phrase_Condition * Game_Version +  
##     (1 | Player_ID)
##    Data: data
## 
## REML criterion at convergence: 18849.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8973 -0.6912 -0.1075  0.5843  4.1400 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  Player_ID (Intercept) 0.01431  0.1196  
##  Residual              0.13911  0.3730  
## Number of obs: 21488, groups:  Player_ID, 38
## 
## Fixed effects:
##                                   Estimate Std. Error         df t value
## (Intercept)                      6.381e-01  4.205e-02  4.061e+01  15.174
## Question_Num                     1.384e-04  2.986e-05  2.144e+04   4.636
## Phrase_Condition2                1.325e-01  1.812e-02  2.144e+04   7.309
## Phrase_Condition3               -6.598e-02  1.805e-02  2.144e+04  -3.656
## Phrase_Condition4                3.188e-01  1.608e-02  2.145e+04  19.827
## Game_Version2                   -3.516e-02  5.921e-02  3.989e+01  -0.594
## Game_Version3                   -7.726e-02  5.778e-02  4.010e+01  -1.337
## Game_Version4                   -2.110e-02  5.750e-02  3.932e+01  -0.367
## Phrase_Condition2:Game_Version2  2.003e-01  2.545e-02  2.144e+04   7.871
## Phrase_Condition3:Game_Version2  1.971e-01  2.541e-02  2.144e+04   7.757
## Phrase_Condition4:Game_Version2  1.424e-01  2.239e-02  2.145e+04   6.360
## Phrase_Condition2:Game_Version3  1.808e-01  2.479e-02  2.144e+04   7.293
## Phrase_Condition3:Game_Version3  2.244e-01  2.494e-02  2.144e+04   8.995
## Phrase_Condition4:Game_Version3  1.209e-01  2.231e-02  2.145e+04   5.421
## Phrase_Condition2:Game_Version4 -2.168e-02  2.407e-02  2.144e+04  -0.901
## Phrase_Condition3:Game_Version4  4.028e-02  2.349e-02  2.144e+04   1.715
## Phrase_Condition4:Game_Version4 -2.144e-02  2.118e-02  2.145e+04  -1.012
##                                 Pr(>|t|)    
## (Intercept)                      < 2e-16 ***
## Question_Num                    3.58e-06 ***
## Phrase_Condition2               2.78e-13 ***
## Phrase_Condition3               0.000257 ***
## Phrase_Condition4                < 2e-16 ***
## Game_Version2                   0.555975    
## Game_Version3                   0.188727    
## Game_Version4                   0.715573    
## Phrase_Condition2:Game_Version2 3.68e-15 ***
## Phrase_Condition3:Game_Version2 9.06e-15 ***
## Phrase_Condition4:Game_Version2 2.06e-10 ***
## Phrase_Condition2:Game_Version3 3.13e-13 ***
## Phrase_Condition3:Game_Version3  < 2e-16 ***
## Phrase_Condition4:Game_Version3 6.00e-08 ***
## Phrase_Condition2:Game_Version4 0.367788    
## Phrase_Condition3:Game_Version4 0.086359 .  
## Phrase_Condition4:Game_Version4 0.311457    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 17 > 12.
## Use print(summary(model5), correlation=TRUE)  or
##     vcov(summary(model5))        if you need it
#Diagnostic plots
plot_residuals_diagnostics(model5)

## `geom_smooth()` using formula = 'y ~ x'

# Diagnostic checks - Simulating residuals to validate model assumptions
residuals_simulation <- simulateResiduals(fittedModel = model5, n = 500)
plot(residuals_simulation)

# Model diagnostics
check_model(model5)

# Check for influential cases potentially affecting the model
#influence_measures <- influence(model5)
#plot(influence_measures, which = "cook")

# If assumptions are not met, consider re-fitting the model with transformations or different specifications
# model_revised <- update(model, . ~ . + log(Reaction_Time))
# print(summary(model_revised))

# Model interpretation - examining fixed and random effects
fixed_effects <- fixef(model5)
random_effects <- ranef(model5)

# Printing fixed effects for interpretation
print(fixed_effects)
##                     (Intercept)                    Question_Num 
##                    0.6381127707                    0.0001384314 
##               Phrase_Condition2               Phrase_Condition3 
##                    0.1324504693                   -0.0659754779 
##               Phrase_Condition4                   Game_Version2 
##                    0.3188284767                   -0.0351594821 
##                   Game_Version3                   Game_Version4 
##                   -0.0772602602                   -0.0211035151 
## Phrase_Condition2:Game_Version2 Phrase_Condition3:Game_Version2 
##                    0.2003138616                    0.1971203306 
## Phrase_Condition4:Game_Version2 Phrase_Condition2:Game_Version3 
##                    0.1424237445                    0.1808295797 
## Phrase_Condition3:Game_Version3 Phrase_Condition4:Game_Version3 
##                    0.2243767225                    0.1209361948 
## Phrase_Condition2:Game_Version4 Phrase_Condition3:Game_Version4 
##                   -0.0216796365                    0.0402806219 
## Phrase_Condition4:Game_Version4 
##                   -0.0214371040
# Printing random effects for interpretation
print(random_effects)
## $Player_ID
##     (Intercept)
## 1  -0.163105853
## 2  -0.144771169
## 3  -0.109213499
## 4   0.104317327
## 5   0.133742164
## 6  -0.023698190
## 7   0.041663737
## 8   0.006680579
## 9   0.154384903
## 10  0.041320520
## 11 -0.005030095
## 12  0.068213979
## 13 -0.042275508
## 14 -0.012387106
## 15  0.163320561
## 16  0.081750306
## 17 -0.284207392
## 18 -0.010705265
## 19 -0.058482394
## 20  0.008489376
## 21  0.055441845
## 22  0.021989135
## 23 -0.156603283
## 24 -0.014701342
## 25  0.073543785
## 26  0.143249845
## 27 -0.077776167
## 28  0.004849200
## 29  0.158797420
## 30 -0.015243028
## 31  0.108024919
## 32  0.076923790
## 33  0.075534000
## 34 -0.155164868
## 35  0.181760464
## 36 -0.073866835
## 37 -0.146811014
## 38 -0.209954848
## 
## with conditional variances for "Player_ID"
# Estimate marginal means at various levels of Question_Num
emm2 <- emmeans(model5, specs = ~ Phrase_Condition * Game_Version, at = list(Question_Num = c(1, 10, 20)))
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'pbkrtest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(pbkrtest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'lmerTest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(lmerTest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
summary(emm2)
##  Phrase_Condition Game_Version emmean     SE  df asymp.LCL asymp.UCL
##  1                1             0.640 0.0420 Inf     0.557     0.722
##  2                1             0.772 0.0421 Inf     0.690     0.854
##  3                1             0.574 0.0423 Inf     0.491     0.656
##  4                1             0.958 0.0415 Inf     0.877     1.040
##  1                2             0.604 0.0423 Inf     0.521     0.687
##  2                2             0.937 0.0419 Inf     0.855     1.019
##  3                2             0.736 0.0418 Inf     0.654     0.818
##  4                2             1.066 0.0413 Inf     0.985     1.147
##  1                3             0.562 0.0402 Inf     0.483     0.641
##  2                3             0.876 0.0404 Inf     0.796     0.955
##  3                3             0.721 0.0395 Inf     0.643     0.798
##  4                3             1.002 0.0386 Inf     0.926     1.078
##  1                4             0.618 0.0396 Inf     0.541     0.696
##  2                4             0.729 0.0400 Inf     0.651     0.808
##  3                4             0.593 0.0400 Inf     0.514     0.671
##  4                4             0.916 0.0385 Inf     0.840     0.991
## 
## Results are averaged over the levels of: Question_Num 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95
# Trend analysis
trend_analysis <- emtrends(model5, specs = ~ Phrase_Condition * Game_Version, var = "Question_Num")
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'pbkrtest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(pbkrtest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'lmerTest.limit = 21488' (or larger)
## [or, globally, 'set emm_options(lmerTest.limit = 21488)' or larger];
## but be warned that this may result in large computation time and memory use.
summary(trend_analysis)
##  Phrase_Condition Game_Version Question_Num.trend       SE  df asymp.LCL
##  1                1                      0.000138 2.99e-05 Inf  7.99e-05
##  2                1                      0.000138 2.99e-05 Inf  7.99e-05
##  3                1                      0.000138 2.99e-05 Inf  7.99e-05
##  4                1                      0.000138 2.99e-05 Inf  7.99e-05
##  1                2                      0.000138 2.99e-05 Inf  7.99e-05
##  2                2                      0.000138 2.99e-05 Inf  7.99e-05
##  3                2                      0.000138 2.99e-05 Inf  7.99e-05
##  4                2                      0.000138 2.99e-05 Inf  7.99e-05
##  1                3                      0.000138 2.99e-05 Inf  7.99e-05
##  2                3                      0.000138 2.99e-05 Inf  7.99e-05
##  3                3                      0.000138 2.99e-05 Inf  7.99e-05
##  4                3                      0.000138 2.99e-05 Inf  7.99e-05
##  1                4                      0.000138 2.99e-05 Inf  7.99e-05
##  2                4                      0.000138 2.99e-05 Inf  7.99e-05
##  3                4                      0.000138 2.99e-05 Inf  7.99e-05
##  4                4                      0.000138 2.99e-05 Inf  7.99e-05
##  asymp.UCL
##   0.000197
##   0.000197
##   0.000197
##   0.000197
##   0.000197
##   0.000197
##   0.000197
##   0.000197
##   0.000197
##   0.000197
##   0.000197
##   0.000197
##   0.000197
##   0.000197
##   0.000197
##   0.000197
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95
plot(emm2)

Accuracy with logistic regression

# Define the function to plot diagnostics for logistic mixed-effects models
plotModelDiagnostics <- function(model, data) {
  # Deviance Residuals Plot
  dev_res <- resid(model, type = "deviance")
  plot(dev_res, ylim = c(min(dev_res), max(dev_res)), main = "Deviance Residuals")
  abline(h = 0, col = "red")
  
  # ROC Curve and AUC calculation
  probabilities <- predict(model, type = "response")
  actual <- data$Answer
  pred <- prediction(probabilities, actual)
  perf <- performance(pred, "tpr", "fpr")
  
  # Plot ROC Curve
  plot(perf, main = "ROC Curve")
  abline(0, 1, col = "blue")  # Adding diagonal line for reference
  
  # Calculate AUC and display it
  auc <- performance(pred, "auc")
  auc_value <- auc@y.values[[1]]
  cat("Area Under the Curve (AUC):", auc_value, "\n")
  
  # Simple plot just showing AUC value
  plot(1, type = "n", axes = FALSE, xlab = "", ylab = "", main = "AUC Value")
  text(1, 1, labels = paste("AUC =", round(auc_value, 3)), cex = 2)
}

# Example of using the function (you should already have a model fitted)
# plotModelDiagnostics(model1, data)

0. Accuracy marginalizing over everything

Accounting for variability among participants (participants as a random effect) Oa: ac ~ 1 + (1 | participant)

# Fit a basic logistic mixed-effects model
model0 <- glmer(Answer ~ 1 + (1 | Player_ID), data=data, family=binomial)

summary(model0)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Answer ~ 1 + (1 | Player_ID)
##    Data: data
## 
##      AIC      BIC   logLik deviance df.resid 
##  19426.2  19442.2  -9711.1  19422.2    21486 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3308  0.3398  0.4080  0.4852  0.6156 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  Player_ID (Intercept) 0.147    0.3835  
## Number of obs: 21488, groups:  Player_ID, 38
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.69897    0.06555   25.92   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plotModelDiagnostics(model0, data)

## Area Under the Curve (AUC): 0.6125971

##Does learning take place?

1: ac - 1 + time + (1 + time | participant)

control <- glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1000000))
model1 <- glmer(Answer ~ 1 + Question_Num + (1 + Question_Num | Player_ID), data = data, family = binomial, control = control)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0920642 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?
summary(model1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Answer ~ 1 + Question_Num + (1 + Question_Num | Player_ID)
##    Data: data
## Control: control
## 
##      AIC      BIC   logLik deviance df.resid 
##  19072.6  19112.4  -9531.3  19062.6    21483 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.2920  0.2952  0.3987  0.4907  0.8663 
## 
## Random effects:
##  Groups    Name         Variance  Std.Dev. Corr 
##  Player_ID (Intercept)  4.367e-01 0.660848      
##            Question_Num 9.453e-06 0.003075 -0.85
## Number of obs: 21488, groups:  Player_ID, 38
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.4407382  0.1145751  12.575  < 2e-16 ***
## Question_Num 0.0021346  0.0005408   3.947 7.91e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## Question_Nm -0.843
## optimizer (bobyqa) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0920642 (tol = 0.002, component 1)
## Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?
plotModelDiagnostics(model1, data)

## Area Under the Curve (AUC): 0.6584369

2. Which condition is hardest overall?

2: ac - 1 + condition + (1 + condition | participant)

control <- glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1000000))
model2 <- glmer(Answer ~ 1 + Phrase_Condition + (1 + Phrase_Condition | Player_ID), data = data, family = binomial, control = control)


# Output the summary of the model to see the results
summary(model2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Answer ~ 1 + Phrase_Condition + (1 + Phrase_Condition | Player_ID)
##    Data: data
## Control: control
## 
##      AIC      BIC   logLik deviance df.resid 
##  17124.4  17236.1  -8548.2  17096.4    21474 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -8.6838  0.1302  0.2550  0.6043  0.8368 
## 
## Random effects:
##  Groups    Name              Variance Std.Dev. Corr             
##  Player_ID (Intercept)       0.6505   0.8065                    
##            Phrase_Condition2 0.6693   0.8181   -0.71            
##            Phrase_Condition3 0.5456   0.7387   -0.10  0.07      
##            Phrase_Condition4 0.7101   0.8427   -0.95  0.78  0.27
## Number of obs: 21488, groups:  Player_ID, 38
## 
## Fixed effects:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         3.6326     0.1716  21.169  < 2e-16 ***
## Phrase_Condition2  -1.0569     0.1862  -5.677 1.37e-08 ***
## Phrase_Condition3  -0.6074     0.1870  -3.248  0.00116 ** 
## Phrase_Condition4  -2.7371     0.1777 -15.404  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Phr_C2 Phr_C3
## Phrs_Cndtn2 -0.772              
## Phrs_Cndtn3 -0.435  0.387       
## Phrs_Cndtn4 -0.961  0.797  0.502
plotModelDiagnostics(model2, data)

## Area Under the Curve (AUC): 0.7628776

##Does the version of the experiment cause different outcomes?

ac - 1 + version + ((1 | participant)

control <- glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1000000))
model3 <- glmer(Answer ~ 1 + Game_Version + (1 | Player_ID), data = data, family = binomial, control = control)


summary(model3)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Answer ~ 1 + Game_Version + (1 | Player_ID)
##    Data: data
## Control: control
## 
##      AIC      BIC   logLik deviance df.resid 
##  19427.4  19467.3  -9708.7  19417.4    21483 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2821  0.3402  0.4049  0.4857  0.6153 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  Player_ID (Intercept) 0.1276   0.3573  
## Number of obs: 21488, groups:  Player_ID, 38
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     1.9312     0.1276  15.131   <2e-16 ***
## Game_Version2  -0.2351     0.1796  -1.309   0.1905    
## Game_Version3  -0.3804     0.1739  -2.187   0.0288 *  
## Game_Version4  -0.2899     0.1742  -1.664   0.0961 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Gm_Vr2 Gm_Vr3
## Game_Versn2 -0.710              
## Game_Versn3 -0.732  0.520       
## Game_Versn4 -0.731  0.520  0.536
plotModelDiagnostics(model3, data)

## Area Under the Curve (AUC): 0.6125669

##Hoes does the effect of condition vary depending on the version?

ac - 1 + condition * version + (1 + condition | participant)

control <- glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1000000))
model4 <- glmer(Answer ~ 1 + Phrase_Condition * Game_Version + (1 | Player_ID), data = data, family = binomial, control = control)

summary(model4)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Answer ~ 1 + Phrase_Condition * Game_Version + (1 | Player_ID)
##    Data: data
## Control: control
## 
##      AIC      BIC   logLik deviance df.resid 
##  17377.0  17512.6  -8671.5  17343.0    21471 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.3529  0.1829  0.2815  0.5604  0.8339 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  Player_ID (Intercept) 0.08701  0.295   
## Number of obs: 21488, groups:  Player_ID, 38
## 
## Fixed effects:
##                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      3.29538    0.20492  16.081  < 2e-16 ***
## Phrase_Condition2               -0.58366    0.22782  -2.562 0.010409 *  
## Phrase_Condition3               -0.02402    0.24690  -0.097 0.922491    
## Phrase_Condition4               -2.23561    0.18820 -11.879  < 2e-16 ***
## Game_Version2                   -0.17268    0.27494  -0.628 0.529975    
## Game_Version3                    0.26339    0.29539   0.892 0.372568    
## Game_Version4                    0.03719    0.27202   0.137 0.891265    
## Phrase_Condition2:Game_Version2 -0.09807    0.29960  -0.327 0.743405    
## Phrase_Condition3:Game_Version2 -1.14246    0.30586  -3.735 0.000188 ***
## Phrase_Condition4:Game_Version2  0.05288    0.24940   0.212 0.832088    
## Phrase_Condition2:Game_Version3 -0.56761    0.31927  -1.778 0.075436 .  
## Phrase_Condition3:Game_Version3 -1.18155    0.32808  -3.601 0.000316 ***
## Phrase_Condition4:Game_Version3 -0.50828    0.27148  -1.872 0.061175 .  
## Phrase_Condition2:Game_Version4 -0.50645    0.29157  -1.737 0.082398 .  
## Phrase_Condition3:Game_Version4 -0.72037    0.30843  -2.336 0.019512 *  
## Phrase_Condition4:Game_Version4 -0.16336    0.24556  -0.665 0.505889    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
plotModelDiagnostics(model4, data)

## Area Under the Curve (AUC): 0.7478004